1. ライブラリ読み込み

library(Seurat)
library(ggplot2)
library(dplyr)
library(patchwork)
library(RColorBrewer)
library(pheatmap)
library(harmony)

2. データ読み込みと Stromal Cell の抽出

2.1 全細胞データの読み込み

full_obj <- readRDS("JIAsyno_CITEseq_full.rds")

cat("=== Full Dataset ===\n")
## === Full Dataset ===
cat("Cells:", ncol(full_obj), "\n")
## Cells: 205230
cat("Genes:", nrow(full_obj), "\n")
## Genes: 30803
cat("Assays:", paste(names(full_obj@assays), collapse = ", "), "\n")
## Assays: RNA, ADT

2.2 Stromal Cell の定義と抽出

Stromal cellとして以下のカテゴリーを global1 列から抽出:

  • Fibroblasts (~15,666 cells)
  • Endothelial (~8,197 cells)
  • Pericytes (~2,228 cells)
  • Lymphatic (~532 cells)
# Stromal cell types を定義
stromal_types <- c("Fibroblasts", "Endothelial", "Pericytes", "Lymphatic")

# global1 列に基づいてサブセット
stromal <- subset(full_obj, subset = global1 %in% stromal_types)

cat("=== Stromal Subset ===\n")
## === Stromal Subset ===
cat("Cells:", ncol(stromal), "\n")
## Cells: 26623
cat("\nCell type composition:\n")
## 
## Cell type composition:
print(table(stromal$global1))
## 
## Endothelial Fibroblasts   Lymphatic   Pericytes 
##        8197       15666         532        2228
# 元の詳細アノテーション(ct_subtype)の確認
cat("=== Original Subtypes (ct_subtype) ===\n")
## === Original Subtypes (ct_subtype) ===
print(sort(table(stromal$ct_subtype), decreasing = TRUE))
## 
##              Venous   CD34+ Fibroblasts  POSTN+ Fibroblasts CXCL12+ Fibroblasts 
##                6877                4130                3934                3285 
##           Pericytes CXCL14+ Fibroblasts      LL Fibroblasts       KDR+ Arterial 
##                2194                1431                1417                 871 
##    MMP+ Fibroblasts   SOX5+ Fibroblasts          Lymphatics    FLI1 + Capillary 
##                 805                 660                 535                 395 
## Cycling Fibroblasts 
##                  89

2.3 元の全体UMAPでのStromal Cell位置

# 全体UMAPの中でStromal cellsをハイライト
full_obj$is_stromal <- ifelse(
  full_obj$global1 %in% stromal_types, 
  as.character(full_obj$global1), 
  "Other"
)

p1 <- DimPlot(full_obj, group.by = "is_stromal", 
              cols = c("Fibroblasts" = "#e74c3c", 
                       "Endothelial" = "#3498db",
                       "Pericytes" = "#2ecc71", 
                       "Lymphatic" = "#9b59b6",
                       "Other" = "grey90"),
              pt.size = 0.1, order = stromal_types) +
  ggtitle("Full UMAP: Stromal Cells Highlighted") +
  theme(plot.title = element_text(face = "bold"))

p1

3. Subclustering パイプライン

3.1 正規化と変動遺伝子の検出

# RNA assay で再正規化
DefaultAssay(stromal) <- "RNA"
stromal <- NormalizeData(stromal, verbose = FALSE)
stromal <- FindVariableFeatures(stromal, 
                                 selection.method = "vst", 
                                 nfeatures = 3000,
                                 verbose = FALSE)

cat("Variable features:", length(VariableFeatures(stromal)), "\n")
## Variable features: 3000
# Top variable features の表示
top20 <- head(VariableFeatures(stromal), 20)
p_vf <- VariableFeaturePlot(stromal)
p_vf <- LabelPoints(plot = p_vf, points = top20, repel = TRUE, xnudge = 0, ynudge = 0)
p_vf + ggtitle("Top Variable Features in Stromal Cells")

3.2 スケーリングとPCA

stromal <- ScaleData(stromal, verbose = FALSE)
stromal <- RunPCA(stromal, npcs = 50, verbose = FALSE)
ElbowPlot(stromal, ndims = 50) +
  ggtitle("PCA Elbow Plot") +
  geom_vline(xintercept = 30, linetype = "dashed", color = "red") +
  annotate("text", x = 32, y = max(Stdev(stromal, reduction = "pca")) * 0.8,
           label = "PC30", color = "red")

p_pca1 <- DimPlot(stromal, reduction = "pca", group.by = "global1") +
  ggtitle("PCA: by Cell Type")
p_pca2 <- DimPlot(stromal, reduction = "pca", group.by = "sample") +
  ggtitle("PCA: by Sample (before Harmony)")
p_pca1 + p_pca2

3.3 Harmony バッチ補正

sample をバッチ変数としてHarmonyで統合:

n_dims <- 30

stromal <- RunHarmony(stromal, 
                      group.by.vars = "sample",
                      verbose = FALSE)
# Harmony補正後の埋め込みを確認
p_h1 <- DimPlot(stromal, reduction = "harmony", group.by = "global1") +
  ggtitle("Harmony: by Cell Type")
p_h2 <- DimPlot(stromal, reduction = "harmony", group.by = "sample") +
  ggtitle("Harmony: by Sample (after correction)")
p_h1 + p_h2

3.4 UMAP と クラスタリング(Harmony基準)

# Harmony補正済み埋め込みからUMAP・近傍グラフを構築
stromal <- RunUMAP(stromal, reduction = "harmony", dims = 1:n_dims, verbose = FALSE)
stromal <- FindNeighbors(stromal, reduction = "harmony", dims = 1:n_dims, verbose = FALSE)

複数の解像度でクラスタリングを実行し、比較:

resolutions <- c(0.3, 0.5, 0.8, 1.0)
for (res in resolutions) {
  stromal <- FindClusters(stromal, resolution = res, verbose = FALSE)
}

cat("Cluster counts by resolution:\n")
## Cluster counts by resolution:
for (res in resolutions) {
  col <- paste0("RNA_snn_res.", res)
  cat(sprintf("  res=%.1f: %d clusters\n", res, length(unique(stromal@meta.data[[col]]))))
}
##   res=0.3: 11 clusters
##   res=0.5: 13 clusters
##   res=0.8: 19 clusters
##   res=1.0: 20 clusters
plots <- list()
for (res in resolutions) {
  col <- paste0("RNA_snn_res.", res)
  Idents(stromal) <- stromal@meta.data[[col]]
  plots[[as.character(res)]] <- DimPlot(stromal, label = TRUE, label.size = 4) +
    ggtitle(paste0("Resolution = ", res)) +
    NoLegend()
}
wrap_plots(plots, ncol = 2)

3.5 解像度の選択

# res=0.5をデフォルトとして使用(必要に応じて変更)
selected_res <- 0.5
Idents(stromal) <- stromal@meta.data[[paste0("RNA_snn_res.", selected_res)]]
stromal$seurat_clusters <- Idents(stromal)

cat("Selected resolution:", selected_res, "\n")
## Selected resolution: 0.5
cat("Number of clusters:", length(unique(Idents(stromal))), "\n")
## Number of clusters: 13
print(table(Idents(stromal)))
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12 
## 6783 4200 3777 3373 1948 1507 1429 1004  772  641  531  370  288

4. クラスター特性の評価

4.1 元のアノテーションとの対応

# クラスターと元のglobal1の対応
p_ct1 <- DimPlot(stromal, group.by = "seurat_clusters", label = TRUE) +
  ggtitle("Subclusters") + NoLegend()
p_ct2 <- DimPlot(stromal, group.by = "global1") +
  ggtitle("Original Cell Type (global1)")
p_ct1 + p_ct2

# クラスター vs 元アノテーション のクロス集計
ct_table <- table(stromal$seurat_clusters, stromal$global1)
ct_prop <- prop.table(ct_table, margin = 1)  # 行ごとの割合

pheatmap(ct_prop,
         color = colorRampPalette(c("white", "#3498db", "#e74c3c"))(50),
         cluster_rows = FALSE,
         cluster_cols = FALSE,
         display_numbers = TRUE,
         number_format = "%.2f",
         main = "Cluster Composition (proportion by global1)")

# クラスター vs ct_subtype
ct2_table <- table(stromal$seurat_clusters, stromal$ct_subtype)
ct2_prop <- prop.table(ct2_table, margin = 1)

# 0でない列のみ残す
ct2_prop <- ct2_prop[, colSums(ct2_prop) > 0]

pheatmap(ct2_prop,
         color = colorRampPalette(c("white", "#2ecc71", "#8e44ad"))(50),
         cluster_rows = FALSE,
         cluster_cols = TRUE,
         display_numbers = TRUE,
         number_format = "%.2f",
         fontsize = 8,
         main = "Cluster Composition (proportion by ct_subtype)")

4.2 マーカー遺伝子の発現

# Stromal cell の既知マーカー遺伝子
marker_genes <- c(
  # Fibroblasts (general)
  "COL1A1", "COL1A2", "DCN", "LUM", "VIM", "PDGFRA",
  # Fibroblast subtypes
  "CD34", "POSTN", "CXCL12", "CXCL14", "MMP1", "MMP3", "SOX5", "MKI67",
  # Endothelial
  "PECAM1", "VWF", "CDH5", "KDR", "ACKR1", "FLT1",
  # Pericytes / Mural cells
  "RGS5", "PDGFRB", "ACTA2", "MCAM", "NOTCH3",
  # Lymphatic
  "PROX1", "LYVE1", "PDPN", "FLT4", "CCL21"
)

# Seuratオブジェクトに存在する遺伝子のみフィルタ
marker_genes <- marker_genes[marker_genes %in% rownames(stromal)]

DotPlot(stromal, features = marker_genes, group.by = "seurat_clusters") +
  RotatedAxis() +
  ggtitle("Known Marker Genes by Cluster") +
  theme(axis.text.x = element_text(size = 9))

4.3 各クラスターのDEG(マーカー遺伝子検出)

# 全クラスターのマーカー遺伝子を検出
all_markers <- FindAllMarkers(stromal,
                               only.pos = TRUE,
                               min.pct = 0.25,
                               logfc.threshold = 0.5,
                               verbose = FALSE)

cat("Total significant markers found:", nrow(all_markers), "\n")
## Total significant markers found: 11282
# Top 5 per cluster
top5 <- all_markers %>%
  group_by(cluster) %>%
  slice_max(n = 5, order_by = avg_log2FC)
# マーカー遺伝子テーブル表示
top5 %>%
  select(cluster, gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  arrange(cluster, desc(avg_log2FC))
# Top 3 マーカーのヒートマップ
top3 <- all_markers %>%
  group_by(cluster) %>%
  slice_max(n = 3, order_by = avg_log2FC) %>%
  pull(gene) %>%
  unique()

DoHeatmap(stromal, features = top3, size = 3) +
  ggtitle("Top 3 Markers per Cluster") +
  theme(axis.text.y = element_text(size = 7))

5. ADTデータの統合解析

# ADT (表面タンパク質) データの確認
DefaultAssay(stromal) <- "ADT"
adt_features <- rownames(stromal[["ADT"]])
cat("Available ADT features:", length(adt_features), "\n")
## Available ADT features: 137
cat(paste(head(adt_features, 30), collapse = ", "), "...\n")
## CD86.1, CD274.1, TNFRSF14.1, PVR.1, NECTIN2.1, CD47.1, CD48.1, CD40.1, CD40LG.1, CD52.1, CD3D.1, CD8A.1, NCAM1.1, CD19.1, CD33.1, ITGAX.1, HLA-A.1, PTPRC.1, IL3RA.1, CD7.1, ENG.1, ITGA6.1, CCR4.1, CD4.1, CD44.1, CD14.1, FCGR3A.1, IL2RA.1, PTPRC.2, PDCD1.1 ...
# Stromal関連のADTマーカーを探す
stromal_adt <- c("CD31", "CD34", "CD90", "CD140a", "CD140b", "CD146", "CD54", "CD106")
available_adt <- stromal_adt[stromal_adt %in% adt_features]

if (length(available_adt) > 0) {
  # ADTの正規化
  stromal <- NormalizeData(stromal, normalization.method = "CLR", margin = 2, verbose = FALSE)
  
  cat("\nAvailable stromal ADT markers:", paste(available_adt, collapse = ", "), "\n")
  
  p_adt_plots <- FeaturePlot(stromal, features = available_adt, 
                              ncol = min(4, length(available_adt)),
                              reduction = "umap")
  print(p_adt_plots)
} else {
  cat("No stromal-specific ADT markers found. Showing available markers.\n")
  # 利用可能なADTの一部を表示
  show_adt <- head(adt_features, 8)
  stromal <- NormalizeData(stromal, normalization.method = "CLR", margin = 2, verbose = FALSE)
  FeaturePlot(stromal, features = show_adt, ncol = 4, reduction = "umap")
}
## No stromal-specific ADT markers found. Showing available markers.

# RNA assayに戻す
DefaultAssay(stromal) <- "RNA"

6. アノテーション

6.1 手動アノテーション

マーカー遺伝子発現パターンとクラスター特性に基づいてアノテーションを付与。 以下は テンプレート であり、実際のクラスタリング結果に基づいて修正が必要:

# === アノテーション辞書 ===
# 各クラスター番号に対して、マーカー遺伝子の発現パターンから
# 細胞タイプ名を割り当てる。
# 
# 実行後のDotPlot・ヒートマップ・クロス集計表を参照して修正してください。

# --- ct_subtypeとの対応関係からアノテーションを推定 ---
# クラスターごとに最も多い ct_subtype を取得
cluster_annotation <- stromal@meta.data %>%
  group_by(seurat_clusters) %>%
  count(ct_subtype) %>%
  slice_max(n = 1, order_by = n) %>%
  select(seurat_clusters, ct_subtype)

cat("=== Auto-inferred annotation (majority vote from ct_subtype) ===\n")
## === Auto-inferred annotation (majority vote from ct_subtype) ===
print(as.data.frame(cluster_annotation))
##    seurat_clusters          ct_subtype
## 1                0              Venous
## 2                1  POSTN+ Fibroblasts
## 3                2   CD34+ Fibroblasts
## 4                3 CXCL12+ Fibroblasts
## 5                4           Pericytes
## 6                5      LL Fibroblasts
## 7                6 CXCL14+ Fibroblasts
## 8                7       KDR+ Arterial
## 9                8    MMP+ Fibroblasts
## 10               9   SOX5+ Fibroblasts
## 11              10          Lymphatics
## 12              11    FLI1 + Capillary
## 13              12           Pericytes
# アノテーションを適用
annotation_map <- setNames(cluster_annotation$ct_subtype, 
                           cluster_annotation$seurat_clusters)
stromal$cell_type <- unname(annotation_map[as.character(stromal$seurat_clusters)])
# === 手動修正セクション ===
# 上記の自動アノテーションが不適切なクラスターがあれば、
# 以下のように個別に修正してください。
#
# 例:
# stromal$cell_type[stromal$seurat_clusters == "5"] <- "Inflammatory Fibroblasts"
# stromal$cell_type[stromal$seurat_clusters == "10"] <- "Tip Endothelial Cells"
#
# 修正後に再度プロットを確認:

cat("=== Final Annotation ===\n")
## === Final Annotation ===
print(table(stromal$cell_type))
## 
##   CD34+ Fibroblasts CXCL12+ Fibroblasts CXCL14+ Fibroblasts    FLI1 + Capillary 
##                3777                3373                1429                 370 
##       KDR+ Arterial      LL Fibroblasts          Lymphatics    MMP+ Fibroblasts 
##                1004                1507                 531                 772 
##           Pericytes  POSTN+ Fibroblasts   SOX5+ Fibroblasts              Venous 
##                2236                4200                 641                6783

6.2 アノテーション結果の可視化

p_annot1 <- DimPlot(stromal, group.by = "seurat_clusters", label = TRUE) +
  ggtitle("Subclusters") + NoLegend()
p_annot2 <- DimPlot(stromal, group.by = "cell_type", label = TRUE, label.size = 3, repel = TRUE) +
  ggtitle("Annotated Cell Types") + NoLegend()
p_annot1 + p_annot2

# アノテーションごとのマーカー発現
Idents(stromal) <- stromal$cell_type
DotPlot(stromal, features = marker_genes) +
  RotatedAxis() +
  ggtitle("Marker Expression by Annotated Cell Type")

7. サンプル間比較

# サンプルごとの細胞タイプ組成
comp_table <- table(stromal$sample, stromal$cell_type)
comp_prop <- prop.table(comp_table, margin = 1)

comp_df <- as.data.frame(comp_prop)
names(comp_df) <- c("Sample", "CellType", "Proportion")

ggplot(comp_df, aes(x = Sample, y = Proportion, fill = CellType)) +
  geom_bar(stat = "identity", position = "stack") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ggtitle("Cell Type Composition by Sample") +
  scale_fill_brewer(palette = "Set3")

# 臨床群(ana)ごとの比較
if ("ana" %in% names(stromal@meta.data)) {
  comp_ana <- table(stromal$ana, stromal$cell_type)
  comp_ana_prop <- prop.table(comp_ana, margin = 1)
  
  ana_df <- as.data.frame(comp_ana_prop)
  names(ana_df) <- c("ANA_Status", "CellType", "Proportion")
  
  ggplot(ana_df, aes(x = ANA_Status, y = Proportion, fill = CellType)) +
    geom_bar(stat = "identity", position = "stack") +
    theme_minimal() +
    ggtitle("Cell Type Composition by ANA Status") +
    scale_fill_brewer(palette = "Set3")
}

8. 結果の保存

# Stromal サブセットオブジェクトを保存
saveRDS(stromal, "JIAsyno_stromal_subclustered.rds")
cat("Saved: JIAsyno_stromal_subclustered.rds\n")
## Saved: JIAsyno_stromal_subclustered.rds
# マーカー遺伝子リストを保存
write.csv(all_markers, "stromal_all_markers.csv", row.names = FALSE)
cat("Saved: stromal_all_markers.csv\n")
## Saved: stromal_all_markers.csv
# アノテーション情報を保存
annotation_df <- stromal@meta.data %>%
  select(seurat_clusters, cell_type, global1, ct_subtype) %>%
  distinct(seurat_clusters, .keep_all = TRUE) %>%
  arrange(as.numeric(as.character(seurat_clusters)))
write.csv(annotation_df, "stromal_annotation.csv", row.names = FALSE)
cat("Saved: stromal_annotation.csv\n")
## Saved: stromal_annotation.csv

9. セッション情報

sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS 26.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Asia/Tokyo
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] harmony_1.2.0      Rcpp_1.0.11        pheatmap_1.0.12    RColorBrewer_1.1-3
##  [5] patchwork_1.1.3    dplyr_1.1.4        ggplot2_3.4.4      Seurat_5.2.1      
##  [9] SeuratObject_5.0.2 sp_2.1-2          
## 
## loaded via a namespace (and not attached):
##   [1] rstudioapi_0.15.0      jsonlite_1.8.8         magrittr_2.0.3        
##   [4] spatstat.utils_3.1-2   farver_2.1.1           rmarkdown_2.25        
##   [7] vctrs_0.6.5            ROCR_1.0-11            spatstat.explore_3.3-4
##  [10] htmltools_0.5.7        sass_0.4.8             sctransform_0.4.1     
##  [13] parallelly_1.36.0      KernSmooth_2.23-22     bslib_0.6.1           
##  [16] htmlwidgets_1.6.4      ica_1.0-3              plyr_1.8.9            
##  [19] plotly_4.10.3          zoo_1.8-12             cachem_1.0.8          
##  [22] igraph_1.6.0           mime_0.12              lifecycle_1.0.4       
##  [25] pkgconfig_2.0.3        Matrix_1.6-5           R6_2.5.1              
##  [28] fastmap_1.1.1          fitdistrplus_1.1-11    future_1.33.1         
##  [31] shiny_1.8.0            digest_0.6.33          colorspace_2.1-0      
##  [34] tensor_1.5             RSpectra_0.16-1        irlba_2.3.5.1         
##  [37] labeling_0.4.3         progressr_0.14.0       fansi_1.0.6           
##  [40] spatstat.sparse_3.1-0  httr_1.4.7             polyclip_1.10-6       
##  [43] abind_1.4-5            compiler_4.3.2         withr_2.5.2           
##  [46] fastDummies_1.7.3      highr_0.10             MASS_7.3-60           
##  [49] tools_4.3.2            lmtest_0.9-40          httpuv_1.6.13         
##  [52] future.apply_1.11.1    goftest_1.2-3          glue_1.6.2            
##  [55] nlme_3.1-163           promises_1.2.1         grid_4.3.2            
##  [58] Rtsne_0.17             cluster_2.1.4          reshape2_1.4.4        
##  [61] generics_0.1.3         gtable_0.3.4           spatstat.data_3.1-4   
##  [64] tidyr_1.3.0            data.table_1.16.0      utf8_1.2.4            
##  [67] spatstat.geom_3.3-5    RcppAnnoy_0.0.21       ggrepel_0.9.4         
##  [70] RANN_2.6.1             pillar_1.9.0           stringr_1.5.1         
##  [73] spam_2.10-0            RcppHNSW_0.5.0         limma_3.58.1          
##  [76] later_1.3.2            splines_4.3.2          lattice_0.21-9        
##  [79] survival_3.5-7         deldir_2.0-2           tidyselect_1.2.0      
##  [82] miniUI_0.1.1.1         pbapply_1.7-2          knitr_1.45            
##  [85] gridExtra_2.3          scattermore_1.2        RhpcBLASctl_0.23-42   
##  [88] xfun_0.41              statmod_1.5.0          matrixStats_1.2.0     
##  [91] stringi_1.8.3          lazyeval_0.2.2         yaml_2.3.8            
##  [94] evaluate_0.23          codetools_0.2-19       tibble_3.2.1          
##  [97] cli_3.6.2              uwot_0.1.16            xtable_1.8-4          
## [100] reticulate_1.35.0      munsell_0.5.0          jquerylib_0.1.4       
## [103] globals_0.16.2         spatstat.random_3.3-2  png_0.1-8             
## [106] spatstat.univar_3.1-2  parallel_4.3.2         ellipsis_0.3.2        
## [109] presto_1.0.0           dotCall64_1.1-1        listenv_0.9.0         
## [112] viridisLite_0.4.2      scales_1.3.0           ggridges_0.5.5        
## [115] purrr_1.0.2            rlang_1.1.2            cowplot_1.1.2